The tidyverse is a collection of packages for doing data science:

It includes a website: http://tidyverse.org and a book: http://r4ds.had.co.nz

At the heart of the tidyverse is tidy data. Every variable is a column, every row is a case.

Base R - we could use built-in functions like aggregate(), by(), ave()

Data are here for this example:

https://archive.ics.uci.edu/ml/datasets/Bank+Marketing

https://archive.ics.uci.edu/ml/machine-learning-databases/00222/bank.zip

Adaption of examples by nightrose Data Scientist AIG New York

Read data via readr

# getwd()
library(dplyr)

Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union
library(readr)
bankData <- read_delim("data/bank-full.csv", ";")
Parsed with column specification:
cols(
  age = col_integer(),
  job = col_character(),
  marital = col_character(),
  education = col_character(),
  default = col_character(),
  balance = col_integer(),
  housing = col_character(),
  loan = col_character(),
  contact = col_character(),
  day = col_integer(),
  month = col_character(),
  duration = col_integer(),
  campaign = col_integer(),
  pdays = col_integer(),
  previous = col_integer(),
  poutcome = col_character(),
  y = col_character()
)
bankData
# let us explore the data set a bit
View(bankData)# allows us to view the data set
names(bankData)  # names of the variables
 [1] "age"       "job"       "marital"   "education" "default"  
 [6] "balance"   "housing"   "loan"      "contact"   "day"      
[11] "month"     "duration"  "campaign"  "pdays"     "previous" 
[16] "poutcome"  "y"        
dim(bankData)  # dimension (number of rows and columns)
[1] 45211    17
str(bankData)  # structure of the data set
Classes ‘tbl_df’, ‘tbl’ and 'data.frame':   45211 obs. of  17 variables:
 $ age      : int  58 44 33 47 33 35 28 42 58 43 ...
 $ job      : chr  "management" "technician" "entrepreneur" "blue-collar" ...
 $ marital  : chr  "married" "single" "married" "married" ...
 $ education: chr  "tertiary" "secondary" "secondary" "unknown" ...
 $ default  : chr  "no" "no" "no" "no" ...
 $ balance  : int  2143 29 2 1506 1 231 447 2 121 593 ...
 $ housing  : chr  "yes" "yes" "yes" "yes" ...
 $ loan     : chr  "no" "no" "yes" "no" ...
 $ contact  : chr  "unknown" "unknown" "unknown" "unknown" ...
 $ day      : int  5 5 5 5 5 5 5 5 5 5 ...
 $ month    : chr  "may" "may" "may" "may" ...
 $ duration : int  261 151 76 92 198 139 217 380 50 55 ...
 $ campaign : int  1 1 1 1 1 1 1 1 1 1 ...
 $ pdays    : int  -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
 $ previous : int  0 0 0 0 0 0 0 0 0 0 ...
 $ poutcome : chr  "unknown" "unknown" "unknown" "unknown" ...
 $ y        : chr  "no" "no" "no" "no" ...
 - attr(*, "spec")=List of 2
  ..$ cols   :List of 17
  .. ..$ age      : list()
  .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
  .. ..$ job      : list()
  .. .. ..- attr(*, "class")= chr  "collector_character" "collector"
  .. ..$ marital  : list()
  .. .. ..- attr(*, "class")= chr  "collector_character" "collector"
  .. ..$ education: list()
  .. .. ..- attr(*, "class")= chr  "collector_character" "collector"
  .. ..$ default  : list()
  .. .. ..- attr(*, "class")= chr  "collector_character" "collector"
  .. ..$ balance  : list()
  .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
  .. ..$ housing  : list()
  .. .. ..- attr(*, "class")= chr  "collector_character" "collector"
  .. ..$ loan     : list()
  .. .. ..- attr(*, "class")= chr  "collector_character" "collector"
  .. ..$ contact  : list()
  .. .. ..- attr(*, "class")= chr  "collector_character" "collector"
  .. ..$ day      : list()
  .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
  .. ..$ month    : list()
  .. .. ..- attr(*, "class")= chr  "collector_character" "collector"
  .. ..$ duration : list()
  .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
  .. ..$ campaign : list()
  .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
  .. ..$ pdays    : list()
  .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
  .. ..$ previous : list()
  .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
  .. ..$ poutcome : list()
  .. .. ..- attr(*, "class")= chr  "collector_character" "collector"
  .. ..$ y        : list()
  .. .. ..- attr(*, "class")= chr  "collector_character" "collector"
  ..$ default: list()
  .. ..- attr(*, "class")= chr  "collector_guess" "collector"
  ..- attr(*, "class")= chr "col_spec"
class(bankData)
[1] "tbl_df"     "tbl"        "data.frame"
head(bankData, n = 5)
tail(bankData, n = 5)
summary(bankData)
      age            job              marital         
 Min.   :18.00   Length:45211       Length:45211      
 1st Qu.:33.00   Class :character   Class :character  
 Median :39.00   Mode  :character   Mode  :character  
 Mean   :40.94                                        
 3rd Qu.:48.00                                        
 Max.   :95.00                                        
  education           default             balance      
 Length:45211       Length:45211       Min.   : -8019  
 Class :character   Class :character   1st Qu.:    72  
 Mode  :character   Mode  :character   Median :   448  
                                       Mean   :  1362  
                                       3rd Qu.:  1428  
                                       Max.   :102127  
   housing              loan             contact         
 Length:45211       Length:45211       Length:45211      
 Class :character   Class :character   Class :character  
 Mode  :character   Mode  :character   Mode  :character  
                                                         
                                                         
                                                         
      day           month              duration     
 Min.   : 1.00   Length:45211       Min.   :   0.0  
 1st Qu.: 8.00   Class :character   1st Qu.: 103.0  
 Median :16.00   Mode  :character   Median : 180.0  
 Mean   :15.81                      Mean   : 258.2  
 3rd Qu.:21.00                      3rd Qu.: 319.0  
 Max.   :31.00                      Max.   :4918.0  
    campaign          pdays          previous       
 Min.   : 1.000   Min.   : -1.0   Min.   :  0.0000  
 1st Qu.: 1.000   1st Qu.: -1.0   1st Qu.:  0.0000  
 Median : 2.000   Median : -1.0   Median :  0.0000  
 Mean   : 2.764   Mean   : 40.2   Mean   :  0.5803  
 3rd Qu.: 3.000   3rd Qu.: -1.0   3rd Qu.:  0.0000  
 Max.   :63.000   Max.   :871.0   Max.   :275.0000  
   poutcome              y            
 Length:45211       Length:45211      
 Class :character   Class :character  
 Mode  :character   Mode  :character  
                                      
                                      
                                      
hist(bankData$age,
     main = "Histogram of Age",
     xlab = "Age in Years")

boxplot(bankData$age,
        main = toupper("Boxplot of Age"),
        ylab = "Age in years",
        col = "blue")

d <- density(bankData$age)
plot(d, main = "Kernel density of Age")
polygon(d, col = "red", border = "blue")

Seven dplyr verbs & description fundamental functions of data transformation select() select columns/variables filter() filter rows / provides basic filtering capabilities arrange() re-order or arrange rows /ordering data mutate() create new columns/variables summarise() summarise values/data by functions of choice group_by() allows for group operations in the “split-apply-combine” concept / groups data by categorical levels join() joining separate dataframes

The filter verb takes conditions for filtering rows based on conditions

Subsetting Example 1 Filter

filter(bankData, default == 'yes')

Subsetting Example 2 Filter

filter(bankData, balance < 1000)

Subsetting Example 3

filter(bankData, month %in% c("april", "may", "jun"), 
       default == "yes")

You can also extract particular rows by number using slice().

slice(bankData, 5:10)

You can use the select() verb to specify which columns of a dataset you want

Select Example 1

select(bankData, age, job, default, balance, housing)

Select Example 2

select(bankData, default:duration, contains("p"))

Rename verb to easily rename variables Select Example 3

select(bankData, bought_option=y)

Rename Example

rename(bankData, bought_option=y)

You can reorder your dataset based on conditions using the arrange() verb

Arrange Example 1

arrange(bankData, job, default)

Arrange Example 2

arrange(bankData, balance, default)

Arrange Example 3 You can use desc() to sort in descending order.

arrange(bankData, desc(balance), default)

Transformations

The mutate() verb can be used to make new columns

mutate(bankData, "DefaultFlag" = ifelse(default == 'yes', 1, 0))

Transformations 2

mutate(bankData, "BalanceByDuration" = balance/duration)

mutate() retains all columns. If you only want to keep the new transforms, you can use transmute()

Transmute Example

transmute(bankData, "BalanceByDuration" = balance/duration)

Summarise Data by Groups The group_by verb creates a grouping by a categorical variable

args(group_by)
function (.data, ..., add = FALSE) 
NULL

Example group_by 1

summarise(group_by(bankData, default), Num = n())

Example group_by 2

summarise(group_by(bankData, default), Ave.Balance = mean(balance))

Example group_by 3

summarise(group_by(bankData, default), Ave.Balance = mean(balance), Num = n())

Chaining/Piping The pipe operator %>% Passes result on left into first argument of function on right. Piping is not restricted to dplyr manipulation tasks

Take the hflights data set and then…Add a variable named diff that is the result of subtracting TaxiIn from TaxiOut, and then… Pick all of the rows whose diff value does not equal NA, and then… Summarise the data set with a value named avg that is the mean diff value. hflights %>% mutate(diff=(TaxiIn-TaxiOut)) %>% filter(is.na(diff)) %>% summarise(avg=mean(diff))

Standard

arrange(filter(select(bankData, age, job, education, default), default == 'yes'), job, education, age)

With Pipes

arrange(
  filter(
    select(bankData, age, job, education, default), 
    default == 'yes'), 
  job, education, age)

Piping example The pipe operator is very helpful for group by summaries

bankData %>% 
  select(age, job, education, default) %>%
  filter(default == 'yes') %>%
  arrange(job, education, age)

No Pipes

x1 <- rnorm(10)
x2 <- rnorm(10)
sqrt(sum((x1 - x2)^2))
[1] 3.844407

With Pipes

(x1 - x2)^2 %>% sum() %>% sqrt()
[1] 3.844407

Pipe + group_by() The pipe operator is very helpful for group by summaries

bankData %>% group_by(job) %>%
  summarise(Number = n(),
            Average.Balance = mean(balance),
            Number.Defaulted = sum(default == 'yes'),
            Default.Rate = Number.Defaulted/Number)

Pipe and Ploting

library(ggplot2)
bankData %>% 
  filter(job %in% c("management", "technician", "unemployed")) %>%
  group_by(job, marital) %>% 
  summarise(Counts = n() ) %>% 
  ggplot() + 
  geom_bar(aes(x = job, y = Counts, fill = marital),
           stat = 'identity', position = 'dodge')

Piping: Unique Values Piping is also very helpful with identifying unique rows. You can also use distinct() to identify unique rows and is typically used with arrange().

bankData %>% 
  select(job, marital, education, default, housing, loan, contact) %>%
  arrange(job, marital, education, default, housing, loan, contact) %>%
  distinct()

Unique Keys You can specify variables that you only want unique values for.

bankData %>% 
  select(job, marital, education, default, housing, loan, contact) %>%
  arrange(job, marital, education, default, housing, loan, contact) %>%
  distinct(job, marital, education)

Unique Keys It will keep the first row with those particular key values.

bankData %>% 
  select(job, marital, education, default, housing, loan, contact) %>%
  arrange(job, marital, education, desc(default), desc(housing), desc(loan), desc(contact)) %>%
  distinct(job, marital, education)

Multiple Columns You can summarise or mutate multiple columns using the same grouping variable. summarise_each allows you to apply the same summary function to multiple columns mutate_each also does a similar manipulation for mutate

help(summarise_each)

Summarise_each Example

bankData %>%
  group_by(education) %>%
  summarise_each(funs(mean), balance, duration)

summarise_each Example 2 You can also use multiple functions.

bankData %>%
  group_by(education) %>%
  summarise_each(funs(min, mean, max), balance, duration)

mutate_each Example You can use the . to indicate where the variables go in an arbitrary function.

bankData %>% 
  group_by(month) %>% 
  select(balance, duration) %>% 
  mutate_each(funs(half = ./2))
Adding missing grouping variables: `month`

Additional Helper Functions Helper functions n() and count() count the number of rows in a group Helper function n_distinct(vector) counts the number of unique items in that vector

bankData %>% 
  group_by(job, default) %>%
  summarise(education_levels = n_distinct(education))

tally tally() is a shortcut for counting

bankData %>% group_by(job) %>%
  tally()

Without tally()

bankData %>% group_by(job) %>%
  summarise(n = n())

count count() makes it even easier.

bankData %>% count(job)

Ranking Variables In base R, you can use rank.

args(rank)
function (x, na.last = TRUE, ties.method = c("average", "first", 
    "random", "max", "min")) 
NULL

Rank Examples

bankData %>% slice(1:10) %>% 
  transmute(Job = job,
            jobRankAvg = rank(job), 
            jobRankRow = row_number(job), 
            jobRankMin = min_rank(job),
            jobRankDense = dense_rank(job),
            jobRankPerc = percent_rank(job),
            jobRankCume = cume_dist(job))

Applying custom functions You can also apply your own custom functions using do()

set.seed(1)
df <- data.frame(
  houseID = rep(1:10, each = 10), 
  year = 1995:2004, 
  price = ifelse(runif(10 * 10) > 0.50, NA, exp(rnorm(10 * 10)))
)
head(df)

Grouped Tests

bankData %>% 
  filter(marital %in% c('married', 'single')) %>% 
  group_by(job) %>% 
  do(tTest = t.test(age ~ marital, data = .)) %>%
  mutate("tTestPVal" = get("p.value",tTest), "tTestStat" = get("statistic", tTest))

Changing gears… Let’s take a look at a new dataset:

Data are from https://stat.duke.edu/~mc301/data/hdi.csv

The Human Development Index (HDI) is a composite statistic of life expectancy, education, and per capita income indicators, which are used to rank countries into four tiers of human development.

Adaption of example by Mine Cetinkaya-Rundel

tidyr

A package that reshapes the layout of tabular data.

http://vita.had.co.nz/papers/tidy-data.html

tidyr Operations

There are four fundamental functions of data tidying:

gather() takes multiple columns, and gathers them into key-value pairs: it makes “wide” data longer spread() takes two columns (key & value) and spreads in to multiple columns, it makes “long” data wider separate() splits a single column into multiple columns unite() combines multiple columns into a single column


hdi <- read_csv("https://stat.duke.edu/~mc301/data/hdi.csv")

View(hdi)

Wide to long data with gather

library(tidyr)
library(stringr)
hdi_long <- gather(hdi, key = year, value = hd_index, hdi_1980:hdi_2011)

View(hdi_long)

Let’s do a little bit better…

hdi_long2 <- hdi_long %>%
  mutate(year = as.numeric(str_replace(year, "hdi_", "")))

View(hdi_long2)

Long to wide data with spread

hdi_wide <- spread(hdi_long2, key = year, value = hd_index)

View(hdi_wide)

This is just a small peak at the functions and power of the tidyverse.

LS0tCnRpdGxlOiAiSW50cm9kdWN0aW9uIHRvIFRpZHl2ZXJzZSIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKYGBge3Igc2V0dXAsIGluY2x1ZGUgPSBGQUxTRX0Ka25pdHI6Om9wdHNfY2h1bmskc2V0KGVjaG8gPSBUUlVFKQpgYGAKCgpUaGUgdGlkeXZlcnNlIGlzIGEgY29sbGVjdGlvbiBvZiBwYWNrYWdlcyBmb3IgZG9pbmcgZGF0YSBzY2llbmNlOgoKIVtdKG5vdGVib29rcy10aWR5dmVyc2UucG5nKQoKSXQgaW5jbHVkZXMgYSB3ZWJzaXRlOiBodHRwOi8vdGlkeXZlcnNlLm9yZyBhbmQgYSBib29rOiBodHRwOi8vcjRkcy5oYWQuY28ubnoKCkF0IHRoZSBoZWFydCBvZiB0aGUgdGlkeXZlcnNlIGlzIHRpZHkgZGF0YS4gRXZlcnkgdmFyaWFibGUgaXMgYSBjb2x1bW4sIGV2ZXJ5IHJvdyBpcyBhIGNhc2UuCgpCYXNlIFIgLSB3ZSBjb3VsZCB1c2UgYnVpbHQtaW4gZnVuY3Rpb25zIGxpa2UgYWdncmVnYXRlKCksIGJ5KCksIGF2ZSgpCgpEYXRhIGFyZSBoZXJlIGZvciB0aGlzIGV4YW1wbGU6CgpodHRwczovL2FyY2hpdmUuaWNzLnVjaS5lZHUvbWwvZGF0YXNldHMvQmFuaytNYXJrZXRpbmcKCmh0dHBzOi8vYXJjaGl2ZS5pY3MudWNpLmVkdS9tbC9tYWNoaW5lLWxlYXJuaW5nLWRhdGFiYXNlcy8wMDIyMi9iYW5rLnppcAoKQWRhcHRpb24gb2YgZXhhbXBsZXMgYnkgbmlnaHRyb3NlIERhdGEgU2NpZW50aXN0IEFJRyBOZXcgWW9yawoKUmVhZCBkYXRhIHZpYSByZWFkcgoKYGBge3J9CiMgZ2V0d2QoKQpsaWJyYXJ5KGRwbHlyKQpsaWJyYXJ5KHJlYWRyKQpiYW5rRGF0YSA8LSByZWFkX2RlbGltKCJkYXRhL2JhbmstZnVsbC5jc3YiLCAiOyIpCmJhbmtEYXRhCiMgbGV0IHVzIGV4cGxvcmUgdGhlIGRhdGEgc2V0IGEgYml0ClZpZXcoYmFua0RhdGEpIyBhbGxvd3MgdXMgdG8gdmlldyB0aGUgZGF0YSBzZXQKbmFtZXMoYmFua0RhdGEpICAjIG5hbWVzIG9mIHRoZSB2YXJpYWJsZXMKZGltKGJhbmtEYXRhKSAgIyBkaW1lbnNpb24gKG51bWJlciBvZiByb3dzIGFuZCBjb2x1bW5zKQpzdHIoYmFua0RhdGEpICAjIHN0cnVjdHVyZSBvZiB0aGUgZGF0YSBzZXQKY2xhc3MoYmFua0RhdGEpCmhlYWQoYmFua0RhdGEsIG4gPSA1KQp0YWlsKGJhbmtEYXRhLCBuID0gNSkKc3VtbWFyeShiYW5rRGF0YSkKYGBgCgpgYGB7cn0KaGlzdChiYW5rRGF0YSRhZ2UsCiAgICAgbWFpbiA9ICJIaXN0b2dyYW0gb2YgQWdlIiwKICAgICB4bGFiID0gIkFnZSBpbiBZZWFycyIpCmBgYAoKYGBge3J9CmJveHBsb3QoYmFua0RhdGEkYWdlLAogICAgICAgIG1haW4gPSB0b3VwcGVyKCJCb3hwbG90IG9mIEFnZSIpLAogICAgICAgIHlsYWIgPSAiQWdlIGluIHllYXJzIiwKICAgICAgICBjb2wgPSAiYmx1ZSIpCmBgYApgYGB7cn0KZCA8LSBkZW5zaXR5KGJhbmtEYXRhJGFnZSkKcGxvdChkLCBtYWluID0gIktlcm5lbCBkZW5zaXR5IG9mIEFnZSIpCnBvbHlnb24oZCwgY29sID0gInJlZCIsIGJvcmRlciA9ICJibHVlIikKYGBgCgpTZXZlbiBkcGx5ciB2ZXJicyAmIGRlc2NyaXB0aW9uIGZ1bmRhbWVudGFsIGZ1bmN0aW9ucyBvZiBkYXRhIHRyYW5zZm9ybWF0aW9uCnNlbGVjdCgpIHNlbGVjdCBjb2x1bW5zL3ZhcmlhYmxlcwpmaWx0ZXIoKSBmaWx0ZXIgcm93cyAvIHByb3ZpZGVzIGJhc2ljIGZpbHRlcmluZyBjYXBhYmlsaXRpZXMKYXJyYW5nZSgpIHJlLW9yZGVyIG9yIGFycmFuZ2Ugcm93cyAvb3JkZXJpbmcgZGF0YQptdXRhdGUoKSBjcmVhdGUgbmV3IGNvbHVtbnMvdmFyaWFibGVzCnN1bW1hcmlzZSgpIHN1bW1hcmlzZSB2YWx1ZXMvZGF0YSBieSBmdW5jdGlvbnMgb2YgY2hvaWNlCmdyb3VwX2J5KCkgYWxsb3dzIGZvciBncm91cCBvcGVyYXRpb25zIGluIHRoZSDigJxzcGxpdC1hcHBseS1jb21iaW5l4oCdIGNvbmNlcHQgLyBncm91cHMgZGF0YSBieSBjYXRlZ29yaWNhbCBsZXZlbHMKam9pbigpIGpvaW5pbmcgc2VwYXJhdGUgZGF0YWZyYW1lcwoKVGhlIGZpbHRlciB2ZXJiIHRha2VzIGNvbmRpdGlvbnMgZm9yIGZpbHRlcmluZyByb3dzIGJhc2VkIG9uCmNvbmRpdGlvbnMKClN1YnNldHRpbmcgRXhhbXBsZSAxCkZpbHRlcgoKYGBge3J9CmZpbHRlcihiYW5rRGF0YSwgZGVmYXVsdCA9PSAneWVzJykKYGBgCgpTdWJzZXR0aW5nIEV4YW1wbGUgMgpGaWx0ZXIKCmBgYHtyfQpmaWx0ZXIoYmFua0RhdGEsIGJhbGFuY2UgPCAxMDAwKQpgYGAKClN1YnNldHRpbmcgRXhhbXBsZSAzCgpgYGB7cn0KZmlsdGVyKGJhbmtEYXRhLCBtb250aCAlaW4lIGMoImFwcmlsIiwgIm1heSIsICJqdW4iKSwgCiAgICAgICBkZWZhdWx0ID09ICJ5ZXMiKQpgYGAKCllvdSBjYW4gYWxzbyBleHRyYWN0IHBhcnRpY3VsYXIgcm93cyBieSBudW1iZXIgdXNpbmcgc2xpY2UoKS4KCmBgYHtyfQpzbGljZShiYW5rRGF0YSwgNToxMCkKYGBgCgpZb3UgY2FuIHVzZSB0aGUgc2VsZWN0KCkgdmVyYiB0byBzcGVjaWZ5IHdoaWNoIGNvbHVtbnMgb2YgYQpkYXRhc2V0IHlvdSB3YW50CgpTZWxlY3QgRXhhbXBsZSAxCgpgYGB7cn0Kc2VsZWN0KGJhbmtEYXRhLCBhZ2UsIGpvYiwgZGVmYXVsdCwgYmFsYW5jZSwgaG91c2luZykKYGBgCgpTZWxlY3QgRXhhbXBsZSAyCgpgYGB7cn0Kc2VsZWN0KGJhbmtEYXRhLCBkZWZhdWx0OmR1cmF0aW9uLCBjb250YWlucygicCIpKQpgYGAKClJlbmFtZSB2ZXJiIHRvIGVhc2lseSByZW5hbWUgdmFyaWFibGVzClNlbGVjdCBFeGFtcGxlIDMgCgoKYGBge3J9CnNlbGVjdChiYW5rRGF0YSwgYm91Z2h0X29wdGlvbj15KQpgYGAKClJlbmFtZSBFeGFtcGxlCgpgYGB7cn0KcmVuYW1lKGJhbmtEYXRhLCBib3VnaHRfb3B0aW9uPXkpCmBgYAoKCllvdSBjYW4gcmVvcmRlciB5b3VyIGRhdGFzZXQgYmFzZWQgb24gY29uZGl0aW9ucyB1c2luZyB0aGUKYXJyYW5nZSgpIHZlcmIKCkFycmFuZ2UgRXhhbXBsZSAxCgoKYGBge3J9CmFycmFuZ2UoYmFua0RhdGEsIGpvYiwgZGVmYXVsdCkKYGBgCgpBcnJhbmdlIEV4YW1wbGUgMgoKYGBge3J9CmFycmFuZ2UoYmFua0RhdGEsIGJhbGFuY2UsIGRlZmF1bHQpCmBgYAoKCgpBcnJhbmdlIEV4YW1wbGUgMwpZb3UgY2FuIHVzZSBkZXNjKCkgdG8gc29ydCBpbiBkZXNjZW5kaW5nIG9yZGVyLgoKCmBgYHtyfQphcnJhbmdlKGJhbmtEYXRhLCBkZXNjKGJhbGFuY2UpLCBkZWZhdWx0KQpgYGAKClRyYW5zZm9ybWF0aW9ucwoKVGhlIG11dGF0ZSgpIHZlcmIgY2FuIGJlIHVzZWQgdG8gbWFrZSBuZXcgY29sdW1ucwoKYGBge3J9Cm11dGF0ZShiYW5rRGF0YSwgIkRlZmF1bHRGbGFnIiA9IGlmZWxzZShkZWZhdWx0ID09ICd5ZXMnLCAxLCAwKSkKYGBgCgpUcmFuc2Zvcm1hdGlvbnMgMgoKYGBge3J9Cm11dGF0ZShiYW5rRGF0YSwgIkJhbGFuY2VCeUR1cmF0aW9uIiA9IGJhbGFuY2UvZHVyYXRpb24pCmBgYAoKbXV0YXRlKCkgcmV0YWlucyBhbGwgY29sdW1ucy4gSWYgeW91IG9ubHkgd2FudCB0byBrZWVwIHRoZSBuZXcKdHJhbnNmb3JtcywgeW91IGNhbiB1c2UgdHJhbnNtdXRlKCkKClRyYW5zbXV0ZSBFeGFtcGxlCgoKYGBge3J9CnRyYW5zbXV0ZShiYW5rRGF0YSwgIkJhbGFuY2VCeUR1cmF0aW9uIiA9IGJhbGFuY2UvZHVyYXRpb24pCmBgYAoKClN1bW1hcmlzZSBEYXRhIGJ5IEdyb3VwcwpUaGUgZ3JvdXBfYnkgdmVyYiBjcmVhdGVzIGEgZ3JvdXBpbmcgYnkgYSBjYXRlZ29yaWNhbAp2YXJpYWJsZQoKYGBge3J9CmFyZ3MoZ3JvdXBfYnkpCmBgYAoKRXhhbXBsZSBncm91cF9ieSAxCgoKYGBge3J9CnN1bW1hcmlzZShncm91cF9ieShiYW5rRGF0YSwgZGVmYXVsdCksIE51bSA9IG4oKSkKYGBgCgpFeGFtcGxlIGdyb3VwX2J5IDIKCgpgYGB7cn0Kc3VtbWFyaXNlKGdyb3VwX2J5KGJhbmtEYXRhLCBkZWZhdWx0KSwgQXZlLkJhbGFuY2UgPSBtZWFuKGJhbGFuY2UpKQpgYGAKCkV4YW1wbGUgZ3JvdXBfYnkgMwoKCmBgYHtyfQpzdW1tYXJpc2UoZ3JvdXBfYnkoYmFua0RhdGEsIGRlZmF1bHQpLCBBdmUuQmFsYW5jZSA9IG1lYW4oYmFsYW5jZSksIE51bSA9IG4oKSkKYGBgCgoKQ2hhaW5pbmcvUGlwaW5nClRoZSBwaXBlIG9wZXJhdG9yICU+JQpQYXNzZXMgcmVzdWx0IG9uIGxlZnQgaW50byBmaXJzdCBhcmd1bWVudCBvZiBmdW5jdGlvbiBvbiByaWdodC4KUGlwaW5nIGlzIG5vdCByZXN0cmljdGVkIHRvIGRwbHlyIG1hbmlwdWxhdGlvbiB0YXNrcwoKVGFrZSB0aGUgaGZsaWdodHMgZGF0YSBzZXQgYW5kIHRoZW7igKZBZGQgYSB2YXJpYWJsZSBuYW1lZCBkaWZmIHRoYXQgaXMgdGhlIHJlc3VsdCBvZiBzdWJ0cmFjdGluZyBUYXhpSW4gZnJvbSBUYXhpT3V0LCBhbmQgdGhlbuKApiBQaWNrIGFsbCBvZiB0aGUgcm93cyB3aG9zZSBkaWZmIHZhbHVlIGRvZXMgbm90IGVxdWFsIE5BLCBhbmQgdGhlbuKApiBTdW1tYXJpc2UgdGhlIGRhdGEgc2V0IHdpdGggYSB2YWx1ZSBuYW1lZCBhdmcgdGhhdCBpcyB0aGUgbWVhbiBkaWZmIHZhbHVlLgpoZmxpZ2h0cyAlPiUgbXV0YXRlKGRpZmY9KFRheGlJbi1UYXhpT3V0KSkgJT4lIGZpbHRlcihpcy5uYShkaWZmKSkgJT4lIHN1bW1hcmlzZShhdmc9bWVhbihkaWZmKSkKClN0YW5kYXJkCgpgYGB7cn0KYXJyYW5nZShmaWx0ZXIoc2VsZWN0KGJhbmtEYXRhLCBhZ2UsIGpvYiwgZWR1Y2F0aW9uLCBkZWZhdWx0KSwgZGVmYXVsdCA9PSAneWVzJyksIGpvYiwgZWR1Y2F0aW9uLCBhZ2UpCmBgYAoKV2l0aCBQaXBlcwoKYGBge3J9CmFycmFuZ2UoCiAgZmlsdGVyKAogICAgc2VsZWN0KGJhbmtEYXRhLCBhZ2UsIGpvYiwgZWR1Y2F0aW9uLCBkZWZhdWx0KSwgCiAgICBkZWZhdWx0ID09ICd5ZXMnKSwgCiAgam9iLCBlZHVjYXRpb24sIGFnZSkKYGBgCgpQaXBpbmcgZXhhbXBsZQpUaGUgcGlwZSBvcGVyYXRvciBpcyB2ZXJ5IGhlbHBmdWwgZm9yIGdyb3VwIGJ5IHN1bW1hcmllcwoKCmBgYHtyfQpiYW5rRGF0YSAlPiUgCiAgc2VsZWN0KGFnZSwgam9iLCBlZHVjYXRpb24sIGRlZmF1bHQpICU+JQogIGZpbHRlcihkZWZhdWx0ID09ICd5ZXMnKSAlPiUKICBhcnJhbmdlKGpvYiwgZWR1Y2F0aW9uLCBhZ2UpCmBgYAoKTm8gUGlwZXMKCmBgYHtyfQp4MSA8LSBybm9ybSgxMCkKeDIgPC0gcm5vcm0oMTApCnNxcnQoc3VtKCh4MSAtIHgyKV4yKSkKYGBgCgpXaXRoIFBpcGVzCgpgYGB7cn0KKHgxIC0geDIpXjIgJT4lIHN1bSgpICU+JSBzcXJ0KCkKYGBgClBpcGUgKyBncm91cF9ieSgpClRoZSBwaXBlIG9wZXJhdG9yIGlzIHZlcnkgaGVscGZ1bCBmb3IgZ3JvdXAgYnkgc3VtbWFyaWVzCgoKYGBge3J9CmJhbmtEYXRhICU+JSBncm91cF9ieShqb2IpICU+JQogIHN1bW1hcmlzZShOdW1iZXIgPSBuKCksCiAgICAgICAgICAgIEF2ZXJhZ2UuQmFsYW5jZSA9IG1lYW4oYmFsYW5jZSksCiAgICAgICAgICAgIE51bWJlci5EZWZhdWx0ZWQgPSBzdW0oZGVmYXVsdCA9PSAneWVzJyksCiAgICAgICAgICAgIERlZmF1bHQuUmF0ZSA9IE51bWJlci5EZWZhdWx0ZWQvTnVtYmVyKQpgYGAKClBpcGUgYW5kIFBsb3RpbmcKCgpgYGB7cn0KbGlicmFyeShnZ3Bsb3QyKQpiYW5rRGF0YSAlPiUgCiAgZmlsdGVyKGpvYiAlaW4lIGMoIm1hbmFnZW1lbnQiLCAidGVjaG5pY2lhbiIsICJ1bmVtcGxveWVkIikpICU+JQogIGdyb3VwX2J5KGpvYiwgbWFyaXRhbCkgJT4lIAogIHN1bW1hcmlzZShDb3VudHMgPSBuKCkgKSAlPiUgCiAgZ2dwbG90KCkgKyAKICBnZW9tX2JhcihhZXMoeCA9IGpvYiwgeSA9IENvdW50cywgZmlsbCA9IG1hcml0YWwpLAogICAgICAgICAgIHN0YXQgPSAnaWRlbnRpdHknLCBwb3NpdGlvbiA9ICdkb2RnZScpCmBgYAoKUGlwaW5nOiBVbmlxdWUgVmFsdWVzClBpcGluZyBpcyBhbHNvIHZlcnkgaGVscGZ1bCB3aXRoIGlkZW50aWZ5aW5nIHVuaXF1ZSByb3dzLiBZb3UgY2FuIGFsc28KdXNlIGRpc3RpbmN0KCkgdG8gaWRlbnRpZnkgdW5pcXVlIHJvd3MgYW5kIGlzIHR5cGljYWxseSB1c2VkIHdpdGgKYXJyYW5nZSgpLgoKCmBgYHtyfQpiYW5rRGF0YSAlPiUgCiAgc2VsZWN0KGpvYiwgbWFyaXRhbCwgZWR1Y2F0aW9uLCBkZWZhdWx0LCBob3VzaW5nLCBsb2FuLCBjb250YWN0KSAlPiUKICBhcnJhbmdlKGpvYiwgbWFyaXRhbCwgZWR1Y2F0aW9uLCBkZWZhdWx0LCBob3VzaW5nLCBsb2FuLCBjb250YWN0KSAlPiUKICBkaXN0aW5jdCgpCmBgYAoKClVuaXF1ZSBLZXlzCllvdSBjYW4gc3BlY2lmeSB2YXJpYWJsZXMgdGhhdCB5b3Ugb25seSB3YW50IHVuaXF1ZSB2YWx1ZXMgZm9yLgoKCmBgYHtyfQpiYW5rRGF0YSAlPiUgCiAgc2VsZWN0KGpvYiwgbWFyaXRhbCwgZWR1Y2F0aW9uLCBkZWZhdWx0LCBob3VzaW5nLCBsb2FuLCBjb250YWN0KSAlPiUKICBhcnJhbmdlKGpvYiwgbWFyaXRhbCwgZWR1Y2F0aW9uLCBkZWZhdWx0LCBob3VzaW5nLCBsb2FuLCBjb250YWN0KSAlPiUKICBkaXN0aW5jdChqb2IsIG1hcml0YWwsIGVkdWNhdGlvbikKYGBgCgpVbmlxdWUgS2V5cwpJdCB3aWxsIGtlZXAgdGhlIGZpcnN0IHJvdyB3aXRoIHRob3NlIHBhcnRpY3VsYXIga2V5IHZhbHVlcy4KCmBgYHtyfQpiYW5rRGF0YSAlPiUgCiAgc2VsZWN0KGpvYiwgbWFyaXRhbCwgZWR1Y2F0aW9uLCBkZWZhdWx0LCBob3VzaW5nLCBsb2FuLCBjb250YWN0KSAlPiUKICBhcnJhbmdlKGpvYiwgbWFyaXRhbCwgZWR1Y2F0aW9uLCBkZXNjKGRlZmF1bHQpLCBkZXNjKGhvdXNpbmcpLCBkZXNjKGxvYW4pLCBkZXNjKGNvbnRhY3QpKSAlPiUKICBkaXN0aW5jdChqb2IsIG1hcml0YWwsIGVkdWNhdGlvbikKYGBgCgoKTXVsdGlwbGUgQ29sdW1ucwpZb3UgY2FuIHN1bW1hcmlzZSBvciBtdXRhdGUgbXVsdGlwbGUgY29sdW1ucyB1c2luZyB0aGUgc2FtZQpncm91cGluZyB2YXJpYWJsZS4Kc3VtbWFyaXNlX2VhY2ggYWxsb3dzIHlvdSB0byBhcHBseSB0aGUgc2FtZSBzdW1tYXJ5CmZ1bmN0aW9uIHRvIG11bHRpcGxlIGNvbHVtbnMKbXV0YXRlX2VhY2ggYWxzbyBkb2VzIGEgc2ltaWxhciBtYW5pcHVsYXRpb24gZm9yIG11dGF0ZQoKCmBgYHtyfQpoZWxwKHN1bW1hcmlzZV9lYWNoKQoKYGBgCgpTdW1tYXJpc2VfZWFjaCBFeGFtcGxlCgpgYGB7cn0KYmFua0RhdGEgJT4lCiAgZ3JvdXBfYnkoZWR1Y2F0aW9uKSAlPiUKICBzdW1tYXJpc2VfZWFjaChmdW5zKG1lYW4pLCBiYWxhbmNlLCBkdXJhdGlvbikKYGBgCgpzdW1tYXJpc2VfZWFjaCBFeGFtcGxlIDIKWW91IGNhbiBhbHNvIHVzZSBtdWx0aXBsZSBmdW5jdGlvbnMuCgpgYGB7cn0KYmFua0RhdGEgJT4lCiAgZ3JvdXBfYnkoZWR1Y2F0aW9uKSAlPiUKICBzdW1tYXJpc2VfZWFjaChmdW5zKG1pbiwgbWVhbiwgbWF4KSwgYmFsYW5jZSwgZHVyYXRpb24pCmBgYAoKbXV0YXRlX2VhY2ggRXhhbXBsZQpZb3UgY2FuIHVzZSB0aGUgLiB0byBpbmRpY2F0ZSB3aGVyZSB0aGUgdmFyaWFibGVzIGdvIGluIGFuIGFyYml0cmFyeQpmdW5jdGlvbi4KCmBgYHtyfQpiYW5rRGF0YSAlPiUgCiAgZ3JvdXBfYnkobW9udGgpICU+JSAKICBzZWxlY3QoYmFsYW5jZSwgZHVyYXRpb24pICU+JSAKICBtdXRhdGVfZWFjaChmdW5zKGhhbGYgPSAuLzIpKQpgYGAKCkFkZGl0aW9uYWwgSGVscGVyIEZ1bmN0aW9ucwpIZWxwZXIgZnVuY3Rpb25zIG4oKSBhbmQgY291bnQoKSBjb3VudCB0aGUgbnVtYmVyIG9mIHJvd3MKaW4gYSBncm91cApIZWxwZXIgZnVuY3Rpb24gbl9kaXN0aW5jdCh2ZWN0b3IpIGNvdW50cyB0aGUgbnVtYmVyIG9mCnVuaXF1ZSBpdGVtcyBpbiB0aGF0IHZlY3RvcgoKYGBge3J9CmJhbmtEYXRhICU+JSAKICBncm91cF9ieShqb2IsIGRlZmF1bHQpICU+JQogIHN1bW1hcmlzZShlZHVjYXRpb25fbGV2ZWxzID0gbl9kaXN0aW5jdChlZHVjYXRpb24pKQpgYGAKCgp0YWxseQp0YWxseSgpIGlzIGEgc2hvcnRjdXQgZm9yIGNvdW50aW5nCgoKYGBge3J9CmJhbmtEYXRhICU+JSBncm91cF9ieShqb2IpICU+JQogIHRhbGx5KCkKYGBgCgoKV2l0aG91dCB0YWxseSgpCgoKYGBge3J9CmJhbmtEYXRhICU+JSBncm91cF9ieShqb2IpICU+JQogIHN1bW1hcmlzZShuID0gbigpKQpgYGAKCgpjb3VudApjb3VudCgpIG1ha2VzIGl0IGV2ZW4gZWFzaWVyLgoKCmBgYHtyfQpiYW5rRGF0YSAlPiUgY291bnQoam9iKQpgYGAKCgoKUmFua2luZyBWYXJpYWJsZXMKSW4gYmFzZSBSLCB5b3UgY2FuIHVzZSByYW5rLgoKYGBge3J9CmFyZ3MocmFuaykKYGBgCgpSYW5rIEV4YW1wbGVzCgpgYGB7cn0KYmFua0RhdGEgJT4lIHNsaWNlKDE6MTApICU+JSAKICB0cmFuc211dGUoSm9iID0gam9iLAogICAgICAgICAgICBqb2JSYW5rQXZnID0gcmFuayhqb2IpLCAKICAgICAgICAgICAgam9iUmFua1JvdyA9IHJvd19udW1iZXIoam9iKSwgCiAgICAgICAgICAgIGpvYlJhbmtNaW4gPSBtaW5fcmFuayhqb2IpLAogICAgICAgICAgICBqb2JSYW5rRGVuc2UgPSBkZW5zZV9yYW5rKGpvYiksCiAgICAgICAgICAgIGpvYlJhbmtQZXJjID0gcGVyY2VudF9yYW5rKGpvYiksCiAgICAgICAgICAgIGpvYlJhbmtDdW1lID0gY3VtZV9kaXN0KGpvYikpCmBgYAoKCkFwcGx5aW5nIGN1c3RvbSBmdW5jdGlvbnMKWW91IGNhbiBhbHNvIGFwcGx5IHlvdXIgb3duIGN1c3RvbSBmdW5jdGlvbnMgdXNpbmcgZG8oKQoKCmBgYHtyfQpzZXQuc2VlZCgxKQpkZiA8LSBkYXRhLmZyYW1lKAogIGhvdXNlSUQgPSByZXAoMToxMCwgZWFjaCA9IDEwKSwgCiAgeWVhciA9IDE5OTU6MjAwNCwgCiAgcHJpY2UgPSBpZmVsc2UocnVuaWYoMTAgKiAxMCkgPiAwLjUwLCBOQSwgZXhwKHJub3JtKDEwICogMTApKSkKKQpoZWFkKGRmKQpgYGAKCkdyb3VwZWQgVGVzdHMKCgpgYGB7cn0KYmFua0RhdGEgJT4lIAogIGZpbHRlcihtYXJpdGFsICVpbiUgYygnbWFycmllZCcsICdzaW5nbGUnKSkgJT4lIAogIGdyb3VwX2J5KGpvYikgJT4lIAogIGRvKHRUZXN0ID0gdC50ZXN0KGFnZSB+IG1hcml0YWwsIGRhdGEgPSAuKSkgJT4lCiAgbXV0YXRlKCJ0VGVzdFBWYWwiID0gZ2V0KCJwLnZhbHVlIix0VGVzdCksICJ0VGVzdFN0YXQiID0gZ2V0KCJzdGF0aXN0aWMiLCB0VGVzdCkpCmBgYAoKQ2hhbmdpbmcgZ2VhcnPigKYKTGV0J3MgdGFrZSBhIGxvb2sgYXQgYSBuZXcgZGF0YXNldDoKCkRhdGEgYXJlIGZyb20gaHR0cHM6Ly9zdGF0LmR1a2UuZWR1L35tYzMwMS9kYXRhL2hkaS5jc3YKClRoZSBIdW1hbiBEZXZlbG9wbWVudCBJbmRleCAoSERJKSBpcyBhIGNvbXBvc2l0ZSBzdGF0aXN0aWMgb2YgbGlmZSBleHBlY3RhbmN5LCBlZHVjYXRpb24sIGFuZCBwZXIgY2FwaXRhIGluY29tZSBpbmRpY2F0b3JzLCB3aGljaCBhcmUgdXNlZCB0byByYW5rIGNvdW50cmllcyBpbnRvIGZvdXIgdGllcnMgb2YgaHVtYW4gZGV2ZWxvcG1lbnQuCgpBZGFwdGlvbiBvZiBleGFtcGxlIGJ5IE1pbmUgQ2V0aW5rYXlhLVJ1bmRlbAoKdGlkeXIKCkEgcGFja2FnZSB0aGF0IHJlc2hhcGVzIHRoZSBsYXlvdXQgb2YgdGFidWxhciBkYXRhLgoKaHR0cDovL3ZpdGEuaGFkLmNvLm56L3BhcGVycy90aWR5LWRhdGEuaHRtbAoKdGlkeXIgT3BlcmF0aW9ucwoKVGhlcmUgYXJlIGZvdXIgZnVuZGFtZW50YWwgZnVuY3Rpb25zIG9mIGRhdGEgdGlkeWluZzoKCmdhdGhlcigpIHRha2VzIG11bHRpcGxlIGNvbHVtbnMsIGFuZCBnYXRoZXJzIHRoZW0gaW50byBrZXktdmFsdWUgcGFpcnM6IGl0IG1ha2VzIOKAnHdpZGXigJ0gZGF0YSBsb25nZXIKc3ByZWFkKCkgdGFrZXMgdHdvIGNvbHVtbnMgKGtleSAmIHZhbHVlKSBhbmQgc3ByZWFkcyBpbiB0byBtdWx0aXBsZSBjb2x1bW5zLCBpdCBtYWtlcyDigJxsb25n4oCdIGRhdGEgd2lkZXIKc2VwYXJhdGUoKSBzcGxpdHMgYSBzaW5nbGUgY29sdW1uIGludG8gbXVsdGlwbGUgY29sdW1ucwp1bml0ZSgpIGNvbWJpbmVzIG11bHRpcGxlIGNvbHVtbnMgaW50byBhIHNpbmdsZSBjb2x1bW4KCgoKYGBge3J9CgpoZGkgPC0gcmVhZF9jc3YoImh0dHBzOi8vc3RhdC5kdWtlLmVkdS9+bWMzMDEvZGF0YS9oZGkuY3N2IikKClZpZXcoaGRpKQoKYGBgCgpXaWRlIHRvIGxvbmcgZGF0YSB3aXRoIGdhdGhlcgoKYGBge3J9CmxpYnJhcnkodGlkeXIpCmxpYnJhcnkoc3RyaW5ncikKaGRpX2xvbmcgPC0gZ2F0aGVyKGhkaSwga2V5ID0geWVhciwgdmFsdWUgPSBoZF9pbmRleCwgaGRpXzE5ODA6aGRpXzIwMTEpCgpWaWV3KGhkaV9sb25nKQoKYGBgCgpMZXQncyBkbyBhIGxpdHRsZSBiaXQgYmV0dGVy4oCmCgpgYGB7cn0KaGRpX2xvbmcyIDwtIGhkaV9sb25nICU+JQogIG11dGF0ZSh5ZWFyID0gYXMubnVtZXJpYyhzdHJfcmVwbGFjZSh5ZWFyLCAiaGRpXyIsICIiKSkpCgpWaWV3KGhkaV9sb25nMikKYGBgCgpMb25nIHRvIHdpZGUgZGF0YSB3aXRoIHNwcmVhZAoKYGBge3J9CmhkaV93aWRlIDwtIHNwcmVhZChoZGlfbG9uZzIsIGtleSA9IHllYXIsIHZhbHVlID0gaGRfaW5kZXgpCgpWaWV3KGhkaV93aWRlKQpgYGAKCgpUaGlzIGlzIGp1c3QgYSBzbWFsbCBwZWFrIGF0IHRoZSBmdW5jdGlvbnMgYW5kIHBvd2VyIG9mIHRoZSB0aWR5dmVyc2UuCg==